version 16
clear all
cd "D:\AGN 2023 RCFS"
set more off
capture log close _all
log using logs\BS.smcl, append name(mainlog)
local lognum = string(clock("$S_DATE $S_TIME","DMY hms"), "%tcCCYY_NN_DD_HH_MM_SS")
log using "logs\sublogs\BSLog_`lognum'.smcl", append name(secondarylog)

******************************
******************************
*General instructions
	*Four do files should be run in sequence
	*As written will generate all main paper and appendix tables *except* A.1 and A.7
	*User is able to delete, with no ramifications, all code in between the appendix code begin/end tags and the appendix tables will not be generated
	*Instructions for tables A.1 and A.7 are included in-line at the appropriate place
	*Code assumes the following folder structure exists:
		*D:\AGN 2023 RCFS\logs\sublogs\
		*D:\AGN 2023 RCFS\output\
		*D:\AGN 2023 RCFS\FHLMC\
		*D:\AGN 2023 RCFS\FNMA\
	*Please direct all questions to https://www.darrenaiello.com/contact
	*Input files (where "year of numlist 2003/2014" and "quarter of numlist 1/4"):
		*D:\AGN 2023 RCFS\LAR_Full_`year'.csv
			*Annual extracts from HMDA executables
			*Plus additional fields appended from BlackBox performance data
				*based on the match between HMDA and BBx
			*Only headers provided
		*D:\AGN 2023 RCFS\`year'tid.csv
			*Tax id extracts from HMDA executables
			*Only headers provided
		*D:\AGN 2023 RCFS\2008countylimit.csv
			*2008 variable conforming limits by county
		*D:\AGN 2023 RCFS\2008msalimit
			*2008 variable conforming limits by msa
		*D:\AGN 2023 RCFS\2009plusConformingLimits.csv
			*2009 and later county level conforming limits	
		*D:\AGN 2023 RCFS\FHLMC\historical_data1_Q`quarter'`year'.txt
			*Core performance data from the FreddieMac Single Family Loan-Level Dataset
			*https://www.freddiemac.com/research/datasets/sf-loanlevel-dataset
			*Data underlies the match process between HMDA and FHLMC 
				*as well as the loan origination and performance data for FHLMC loans
			*Files not provided as they do not contain headers
		*D:\AGN 2023 RCFS\FHLMC_HMDA_Match_Update.csv
			*Sourced from the FHLMC dataset
			*Contains loan origination and performance data
			*Only headers provided
		*D:\AGN 2023 RCFS\FHLMC\FINAL_MATCHES_`year'.csv
			*The results index file for the match between the FHLMC and HMDA datasets
			*Only headers provided
		*D:\AGN 2023 RCFS\FNMA\Acquisition_`year'Q`quarter'.txt
			*Core performance data from the Fannie Mae Single-Family Loan Performace Data
			*https://capitalmarkets.fanniemae.com/credit-risk-transfer/single-family-credit-risk-transfer/fannie-mae-single-family-loan-performance-data
			*Data underlies the match process between HMDA and FNMA
				*as well as the loan origination and performance data for FNMA loans
			*Files not provided as they do not contain headers
		*D:\AGN 2023 RCFS\FNMA_HMDA_Match_Update.csv
			*Sourced from the FNMA dataset
			*Contains loan origination and performance data
			*Only headers provided
		*D:\AGN 2023 RCFS\FNMA\FINAL_MATCHES_`year'.csv
			*The results index file for the match between the FNMA and HMDA datasets
			*Only headers provided
		*D:\AGN 2023 RCFS\LAR_Loan_Chars_Update.csv
			*Origination information pulled from BBx to be appended to the HMDA data
			*Only headers provided
******************************
******************************
		clear all
	insheet using 2009plusConformingLimits.csv, comma
			rename state state_code
			rename county county_code
			rename limit conflimit
				compress
	save 2009plusConformingLimits, replace

		clear all
	insheet using 2008countylimit.csv, comma
			gen year = 2008
				compress
	save 2008countylimit, replace

		clear all
	insheet using 2008msalimit.csv, comma
			gen year = 2008
				compress
	save 2008msalimit, replace

	foreach fileyear in 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 {
			clear all
		insheet using `fileyear'tid.csv, comma
			tostring agency_code, replace
			gen tempstrlen = strlen(respondent_id)
			gen resp_id = (substr("0000000000",1,10-tempstrlen) + respondent_id)
			replace respondent_id = resp_id
			drop resp_id tempstrlen
			rename as_of_year year
				compress
		save hmda_tid_`fileyear', replace
	}

		clear all
	insheet using LAR_Loan_Chars_Update.csv, comma
		tostring agency_code, replace
		rename as_of_year year	
		egen unique_id=concat(year agency_code respondent_id sequence_number), punct("-")
		drop year agency_code respondent_id sequence_number
		gen bbx_data_temp = 1
			compress
	save hmda_LC_Update, replace
	
		clear all
	insheet using FNMA_HMDA_Match_Update.csv, comma
		tostring agency_code, replace
		rename as_of_year year	
		egen unique_id=concat(year agency_code respondent_id sequence_number), punct("-")
		drop year agency_code respondent_id sequence_number
		gen fnma_data_temp = 1
			compress
	save hmda_FNMA_Update, replace

		clear all
	insheet using FHLMC_HMDA_Match_Update.csv, comma
		tostring agency_code, replace
		rename as_of_year year	
		egen unique_id=concat(year agency_code respondent_id sequence_number), punct("-")
		drop year agency_code respondent_id sequence_number
		gen fhlmc_data_temp = 1
			compress
	save hmda_FHLMC_Update, replace

	******************************
	*Appendix Only Code <begin>***
	******************************
						clear all
				gen str12 fhlmc_loanidentifier=""
				gen str1 channel=""
			save BS_FHLMC_Channel, replace
				
			foreach year of numlist 2003/2014 {
				foreach quarter of numlist 1/4 {
					
						clear all
					insheet using FHLMC\historical_data1_Q`quarter'`year'.txt, delimiter("|")
					
					keep v14 v20
					
					rename v14 channel
					rename v20 fhlmc_loanidentifier
					
					append using BS_FHLMC_Channel
						compress
					save BS_FHLMC_Channel, replace
				}
			}
			
					clear all
				gen double fnma_loanidentifier=.
				gen str1 channel=""
			save BS_FNMA_Channel, replace
			
			foreach year of numlist 2003/2014 {
				foreach quarter of numlist 1/4 {
					
						clear all
					insheet using FNMA\Acquisition_`year'Q`quarter'.txt, delimiter("|")
					
						keep v1 v2
						
						rename v2 channel
						rename v1 fnma_loanidentifier
						
						append using BS_FNMA_Channel
							compress
					save BS_FNMA_Channel, replace
				}
			}
				
					clear all
				gen str12 fhlmc_loanidentifier=""
				gen str10 hmda_respondent_id=""
				gen byte hmda_agency_code=.
				gen long hmda_sequence_number=.
				gen long year=.
			save BS_FHLMC_Final_Matches, replace
			
			foreach year of numlist 2003/2014 {
					
					clear all
				insheet using FHLMC\FINAL_MATCHES_`year'.csv, comma
				
					gen year = `year'
					
					append using BS_FHLMC_Final_Matches
						compress
				save BS_FHLMC_Final_Matches, replace
			}
			
			merge m:1 fhlmc_loanidentifier using BS_FHLMC_Channel
				keep if _merge==3
				drop _merge
			
			drop fhlmc_loanidentifier
			
			rename hmda_respondent_id respondent_id
			rename hmda_agency_code agency_code
			rename hmda_sequence_number sequence_number
			
				compress
			save BS_Channels_FHLMC, replace
			
			
					clear all
				gen double fnma_loanidentifier=. 
				gen str10 hmda_respondent_id=""
				gen byte hmda_agency_code=.
				gen long hmda_sequence_number=.
				gen long year = .
			save BS_FNMA_Final_Matches, replace
			
			foreach year of numlist 2003/2014 {
					
					clear all
				insheet using FNMA\FINAL_MATCHES_`year'.csv, comma
				
					gen year = `year'
					
					append using BS_FNMA_Final_Matches
						compress
				save BS_FNMA_Final_Matches, replace
			}
			
			merge m:1 fnma_loanidentifier using BS_FNMA_Channel
				keep if _merge==3
				drop _merge
			
			drop fnma_loanidentifier
			
			rename hmda_respondent_id respondent_id
			rename hmda_agency_code agency_code
			rename hmda_sequence_number sequence_number
			
				compress
			save BS_Channels_FNMA, replace
			
				append using BS_Channels_FHLMC
				
				gen byte retail = 0
					replace retail = 1 if channel=="R"
				
				collapse (max) retail, by(year respondent_id agency_code sequence_number)
				
				compress
			save BS_Channels, replace
	******************************
	*Appendix Only Code <end>*****
	******************************
	
	foreach fileyear in 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 {
			clear all
		insheet using LAR_Full_`fileyear'.csv, comma

			rename loan_amount_000s loanamt
			rename applicant_income_000s income
			rename action_type outcome
			rename as_of_year year
			
			keep if outcome<=5
			
			gen originated=(outcome==1)

			gen appsocial=applicant_race_1
				replace appsocial=6 if appsocial==7
				replace appsocial=7 if applicant_ethnicity==1

			gen pctloanamt=(loanamt>41)+(loanamt>70)+(loanamt>95)+(loanamt>119)+(loanamt>144)+(loanamt>173)+(loanamt>210)+(loanamt>266)+(loanamt>360)  if loanamt<.
			
			tostring agency_code, replace
			
			merge m:1 year agency_code respondent_id using hmda_tid_`fileyear'
				drop if _merge==2
				drop _merge
			
			rename tax_id bank
			egen unique_id=concat(year agency_code respondent_id sequence_number), punct("-")
			drop respondent_id agency_code
			
			merge m:1 state_code county_code year using 2009plusConformingLimits
				drop if _merge==2
				drop _merge
			
			merge m:1 state_code county_code year using 2008countylimit
				drop if _merge==2
				drop _merge
	
			merge m:1 msa_md year using 2008msalimit
				drop if _merge==2
				drop _merge
		
			gen byte jumbo=0 

				replace jumbo=1 if (loanamt>=323 & year==2003 & state_code != 2 & state_code != 15)
				replace jumbo=1 if (loanamt>=334 & year==2004 & state_code != 2 & state_code != 15)
				replace jumbo=1 if (loanamt>=360 & year==2005 & state_code != 2 & state_code != 15)
				replace jumbo=1 if (loanamt>=417 & year==2006 & state_code != 2 & state_code != 15)
				replace jumbo=1 if (loanamt>=417 & year==2007 & state_code != 2 & state_code != 15)
				replace jumbo=1 if (loanamt>=417 & year==2008 & state_code != 2 & state_code != 15)

				replace jumbo=1 if (loanamt>=484 & year==2003 & (state_code == 2 | state_code == 15))
				replace jumbo=1 if (loanamt>=501 & year==2004 & (state_code == 2 | state_code == 15))
				replace jumbo=1 if (loanamt>=539 & year==2005 & (state_code == 2 | state_code == 15))
				replace jumbo=1 if (loanamt>=626 & year==2006 & (state_code == 2 | state_code == 15))
				replace jumbo=1 if (loanamt>=626 & year==2007 & (state_code == 2 | state_code == 15))
				replace jumbo=1 if (loanamt>=626 & year==2008 & (state_code == 2 | state_code == 15)) & countylimit2008==. & msalimit2008==.

				replace jumbo=1 if (loanamt>=conflimit & year>=2009 & conflimit<.)
				replace jumbo=1 if state_code==2 & year==2010 & conflimit==. & (county_code==105 | county_code==230) & loanamt>=417
				
				replace jumbo=1 if year==2008 & loanamt>=countylimit2008 & countylimit2008<.
				replace jumbo=1 if year==2008 & loanamt>=msalimit2008 & msalimit2008<.
		
				drop countylimit2008 msalimit2008 conflimit
				
			gen byte bbx_data = 0
			gen byte fnma_data = 0
			gen byte fhlmc_data = 0
			
			merge 1:1 unique_id using hmda_LC_Update
				drop if _merge == 2 
				drop _merge

				replace bbx_data = bbx_data_temp if bbx_data_temp<.
				
				drop bbx_data_temp

			merge 1:1 unique_id using hmda_FNMA_Update
				drop if _merge == 2 
				drop _merge
				
				replace ever60dq = dq if dq<. & ever60dq==.
				replace everdflt = dflt if dflt<. & everdflt==.
				replace ficoscoreoriginationcalc = fico if fico<. & ficoscoreoriginationcalc==.
				replace origltvratiocalc = ltv if ltv<. & origltvratiocalc==.
				replace amortizationtermcal = term if term<. & amortizationtermcal==.
				replace origintrtcalc = rate if rate<. & origintrtcalc==.
				replace fnma_data=fnma_data_temp if fnma_data_temp<.
				
				drop dq dflt fico ltv term rate fnma_data_temp
				
			merge 1:1 unique_id using hmda_FHLMC_Update
				drop if _merge == 2 
				drop _merge

				replace ever60dq = dq if dq<. & ever60dq==.
				replace everdflt = dflt if dflt<. & everdflt==.
				replace ficoscoreoriginationcalc = fico if fico<. & ficoscoreoriginationcalc==.
				replace origltvratiocalc = ltv if ltv<. & origltvratiocalc==.
				replace amortizationtermcal = term if term<. & amortizationtermcal==.
				replace origintrtcalc = rate if rate<. & origintrtcalc==.
				replace fhlmc_data=fhlmc_data_temp if fhlmc_data_temp<.

				drop dq dflt fico ltv term rate fhlmc_data_temp
				
				******************************
				*Appendix Only Code <begin>***
				******************************
						*For Table A.7 uncomment:
							*keep if jumbo==0
				******************************
				*Appendix Only Code <end>*****
				******************************
				
				compress
		save hmda`fileyear', replace

			egen tract=concat(pctloanamt state_code county_code census_tract_number), punct("-")

			gen rat=loanamt/income
			
			gen denied=(outcome==3)

			by bank tract, sort: egen dealcount=total(originated)
			by bank tract, sort: egen appcount=count(outcome)
			by bank tract, sort: egen voltotal=total(loanamt*(originated==1))
			by bank tract, sort: egen deniedcount=total(denied)

			egen banktract=group(bank tract)

			collapse (mean) dealcount appcount voltotal deniedcount (first) bank tract, by(banktract)

				compress
		save mkt`fileyear', replace

	}

		clear all
		gen int year = .
	foreach fileyear in 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 {
		append using mkt`fileyear'
		replace year = `fileyear' if year==.
	}
				compress
		save mktcurr, replace

		preserve
				replace year=year-1
				drop if year==2002
			save mktfut, replace
		restore
		
			replace year=year+1
			drop if year==2015
		save mktprev, replace
